services:
  agent:
    command: sh -c 'bin/wait && python -m deeppavlov_agent.run agent.pipeline_config=assistant_dists/universal_prompted_assistant/pipeline_conf.json'
    environment:
      WAIT_HOSTS: "sentseg:8011, ranking-based-response-selector:8002, combined-classification:8087, 
        sentence-ranker:8128, 
        transformers-lm-gptjt:8161, openai-api-chatgpt:8145, openai-api-davinci3:8131,
        openai-api-gpt4:8159, openai-api-gpt4-32k:8160, openai-api-chatgpt-16k:8167,
        openai-api-gpt4-turbo:8180, gigachat-api:8187, dff-universal-prompted-skill:8147"
      WAIT_HOSTS_TIMEOUT: ${WAIT_TIMEOUT:-1000}

  sentseg:
    env_file: [ .env ]
    build:
      context: ./annotators/SentSeg/
    command: flask run -h 0.0.0.0 -p 8011
    environment:
      - FLASK_APP=server
    deploy:
      resources:
        limits:
          memory: 1.5G
        reservations:
          memory: 1.5G

  combined-classification:
    env_file: [ .env ]
    build:
      args:
        CONFIG: combined_classifier.json
        SERVICE_PORT: 8087
      context: .
      dockerfile: ./annotators/combined_classification/Dockerfile
    command: gunicorn --workers=1 server:app -b 0.0.0.0:8087 --timeout 600
    environment:
      - CUDA_VISIBLE_DEVICES=0
    deploy:
      resources:
        limits:
          memory: 2G
        reservations:
          memory: 2G

  ranking-based-response-selector:
    env_file: [ .env ]
    build:
      args:
        SERVICE_PORT: 8002
        SERVICE_NAME: response_selector
        LANGUAGE: EN
        SENTENCE_RANKER_ANNOTATION_NAME: sentence_ranker
        SENTENCE_RANKER_SERVICE_URL: http://sentence-ranker:8128/respond
        SENTENCE_RANKER_TIMEOUT: 3
        N_UTTERANCES_CONTEXT: 5
        FILTER_TOXIC_OR_BADLISTED: 1
      context: .
      dockerfile: ./response_selectors/ranking_based_response_selector/Dockerfile
    command: flask run -h 0.0.0.0 -p 8002
    environment:
      - FLASK_APP=server
    deploy:
      resources:
        limits:
          memory: 100M
        reservations:
          memory: 100M

  sentence-ranker:
    env_file: [ .env ]
    build:
      args:
        SERVICE_PORT: 8128
        SERVICE_NAME: sentence_ranker
        PRETRAINED_MODEL_NAME_OR_PATH: sentence-transformers/all-MiniLM-L6-v2
      context: ./services/sentence_ranker/
    command: flask run -h 0.0.0.0 -p 8128
    environment:
      - CUDA_VISIBLE_DEVICES=0
      - FLASK_APP=server
    deploy:
      resources:
        limits:
          memory: 3G
        reservations:
          memory: 3G

  transformers-lm-gptjt:
    env_file: [ .env ]
    build:
      args:
        SERVICE_PORT: 8161
        SERVICE_NAME: transformers_lm_gptjt
        PRETRAINED_MODEL_NAME_OR_PATH: togethercomputer/GPT-JT-6B-v1
        HALF_PRECISION: 1
      context: .
      dockerfile: ./services/transformers_lm/Dockerfile
    command: flask run -h 0.0.0.0 -p 8161
    environment:
      - CUDA_VISIBLE_DEVICES=0
      - FLASK_APP=server
    deploy:
      resources:
        limits:
          memory: 50G
        reservations:
          memory: 50G

  openai-api-chatgpt:
    env_file: [ .env ]
    build:
      args:
        SERVICE_PORT: 8145
        SERVICE_NAME: openai_api_chatgpt
        PRETRAINED_MODEL_NAME_OR_PATH: gpt-3.5-turbo
      context: .
      dockerfile: ./services/openai_api_lm/Dockerfile
    command: flask run -h 0.0.0.0 -p 8145
    environment:
      - FLASK_APP=server
    deploy:
      resources:
        limits:
          memory: 500M
        reservations:
          memory: 100M

  openai-api-davinci3:
    env_file: [ .env ]
    build:
      args:
        SERVICE_PORT: 8131
        SERVICE_NAME: openai_api_davinci3
        PRETRAINED_MODEL_NAME_OR_PATH: text-davinci-003
      context: .
      dockerfile: ./services/openai_api_lm/Dockerfile
    command: flask run -h 0.0.0.0 -p 8131
    environment:
      - FLASK_APP=server
    deploy:
      resources:
        limits:
          memory: 500M
        reservations:
          memory: 100M

  openai-api-gpt4:
    env_file: [ .env ]
    build:
      args:
        SERVICE_PORT: 8159
        SERVICE_NAME: openai_api_gpt4
        PRETRAINED_MODEL_NAME_OR_PATH: gpt-4
      context: .
      dockerfile: ./services/openai_api_lm/Dockerfile
    command: flask run -h 0.0.0.0 -p 8159
    environment:
      - FLASK_APP=server
    deploy:
      resources:
        limits:
          memory: 500M
        reservations:
          memory: 100M

  openai-api-gpt4-32k:
    env_file: [ .env ]
    build:
      args:
        SERVICE_PORT: 8160
        SERVICE_NAME: openai_api_gpt4_32k
        PRETRAINED_MODEL_NAME_OR_PATH: gpt-4-32k
      context: .
      dockerfile: ./services/openai_api_lm/Dockerfile
    command: flask run -h 0.0.0.0 -p 8160
    environment:
      - FLASK_APP=server
    deploy:
      resources:
        limits:
          memory: 500M
        reservations:
          memory: 100M

  openai-api-chatgpt-16k:
    env_file: [ .env ]
    build:
      args:
        SERVICE_PORT: 8167
        SERVICE_NAME: openai_api_chatgpt_16k
        PRETRAINED_MODEL_NAME_OR_PATH: gpt-3.5-turbo-16k
      context: .
      dockerfile: ./services/openai_api_lm/Dockerfile
    command: flask run -h 0.0.0.0 -p 8167
    environment:
      - FLASK_APP=server
    deploy:
      resources:
        limits:
          memory: 500M
        reservations:
          memory: 100M

  openai-api-gpt4-turbo:
    env_file: [ .env ]
    build:
      args:
        SERVICE_PORT: 8180
        SERVICE_NAME: openai_api_gpt4_turbo
        PRETRAINED_MODEL_NAME_OR_PATH: gpt-4-1106-preview
      context: .
      dockerfile: ./services/openai_api_lm/Dockerfile
    command: flask run -h 0.0.0.0 -p 8180
    environment:
      - FLASK_APP=server
    deploy:
      resources:
        limits:
          memory: 500M
        reservations:
          memory: 100M

  gigachat-api:
    env_file: [ .env ]
    build:
      args:
        SERVICE_PORT: 8187
        SERVICE_NAME: gigachat_api
        PRETRAINED_MODEL_NAME_OR_PATH: GigaChat:1.3.23.1
      context: .
      dockerfile: ./services/gigachat_api_lm/Dockerfile
    command: flask run -h 0.0.0.0 -p 8187
    environment:
      - FLASK_APP=server
    deploy:
      resources:
        limits:
          memory: 500M
        reservations:
          memory: 100M

  anthropic-api-claude-v1:
    env_file: [ .env ]
    build:
      args:
        SERVICE_PORT: 8164
        SERVICE_NAME: anthropic_api_claude_v1
        PRETRAINED_MODEL_NAME_OR_PATH: claude-1
      context: .
      dockerfile: ./services/anthropic_api_lm/Dockerfile
    command: flask run -h 0.0.0.0 -p 8164
    environment:
      - FLASK_APP=server
    deploy:
      resources:
        limits:
          memory: 100M
        reservations:
          memory: 100M

  anthropic-api-claude-instant-v1:
    env_file: [ .env ]
    build:
      args:
        SERVICE_PORT: 8163
        SERVICE_NAME: anthropic_api_claude_instant_v1
        PRETRAINED_MODEL_NAME_OR_PATH: claude-instant-1
      context: .
      dockerfile: ./services/anthropic_api_lm/Dockerfile
    command: flask run -h 0.0.0.0 -p 8163
    environment:
      - FLASK_APP=server
    deploy:
      resources:
        limits:
          memory: 100M
        reservations:
          memory: 100M

  dff-universal-prompted-skill:
    env_file: [ .env ]
    build:
      args:
        SERVICE_PORT: 8147
        SERVICE_NAME: dff_universal_prompted_skill
        GENERATIVE_TIMEOUT: 120
        N_UTTERANCES_CONTEXT: 7
        DEFAULT_LM_SERVICE_URL: http://transformers-lm-gptjt:8161/respond
        DEFAULT_LM_SERVICE_CONFIG: default_generative_config.json
      context: .
      dockerfile: ./skills/dff_universal_prompted_skill/Dockerfile
    deploy:
      resources:
        limits:
          memory: 128M
        reservations:
          memory: 128M

version: '3.7'
